Take Home Exercise 3

Show the code
pacman::p_load(jsonlite, tidygraph, ggraph, dplyr,
               visNetwork, graphlayouts, ggforce, knitr, kableExtra,
               skimr, tidytext, tidyverse, igraph, ggplot2, RColorBrewer)
Show the code
mc3_data <- fromJSON("data/MC3.json")
Show the code
mc3_edges <- as_tibble(mc3_data$links) %>% 
  distinct() %>%
  mutate(source = as.character(source),
         target = as.character(target),
         type = as.character(type)) %>%
  group_by(source, target, type) %>%
    summarise(weights = n()) %>%
  filter(source!=target) %>%
  ungroup()
Show the code
mc3_nodes <- as_tibble(mc3_data$nodes) %>%
  mutate(country = as.character(country),
         id = as.character(id),
         product_services = as.character(product_services),
         revenue_omu = as.numeric(as.character(revenue_omu)),
         type = as.character(type)) %>%
  select(id, country, type, revenue_omu, product_services)
Show the code
DT :: datatable(mc3_edges,options = list(lengthMenu = c(3, 10, 20)))
Show the code
DT::datatable(mc3_nodes,options = list(lengthMenu = c(3, 10, 20)))
Show the code
token_nodes <- mc3_nodes %>%
  unnest_tokens(word, 
                product_services)
Show the code
stopwords_removed <- token_nodes %>% 
  anti_join(stop_words)
Show the code
unique_words <- stopwords_removed %>%
  count(word, sort = TRUE)
Show the code
id1 <- mc3_edges %>%
  select(source) %>%
  rename(id = source)
id2 <- mc3_edges %>%
  select(target) %>%
  rename(id = target)
mc3_nodes1 <- rbind(id1, id2) %>%
  distinct() %>%
  left_join(mc3_nodes,
            unmatched = "drop")
Show the code
mc3_graph <- tbl_graph(nodes = mc3_nodes1,
                       edges = mc3_edges,
                       directed = FALSE) %>%
  mutate(betweenness_centrality = centrality_betweenness(),
         degree_centrality = centrality_degree())
mc3_df <- as.data.frame(mc3_graph, what = "both")
Show the code
ggplot(mc3_df,aes(x = type, y = betweenness_centrality)) +                 # Draw ggplot2 boxplot
  geom_boxplot() +
  stat_summary(fun = mean, geom = "point", col = "red") +  # Add points to plot
  stat_summary(fun = mean, geom = "text", col = "red",     # Add text to plot
               vjust = 1.5, aes(label = paste("Mean:", round(..y.., digits = 1))))

Show the code
library(dplyr)
library(tidyr)

# Calculate mean and quartiles
mean_betweenness <- mean(mc3_df$betweenness_centrality, na.rm = TRUE)
quartiles_betweenness <- quantile(mc3_df$betweenness_centrality, probs = c(0.5,0.9,0.95,0.99), na.rm = TRUE)
max_betweenness <- max(mc3_df$betweenness_centrality, na.rm = TRUE)

mean_degree <- mean(mc3_df$degree_centrality, na.rm = TRUE)
quartiles_degree <- quantile(mc3_df$degree_centrality, probs = c(0.50,0.90,0.95), na.rm = TRUE)
max_degree <- max(mc3_df$degree_centrality, na.rm = TRUE)
Show the code
# Create a data frame for betweenness centrality
betweenness_df <- data.frame(
  Measure = c("Mean", "Quartile 0.50 (Median)", "Quartile 0.90", "Quartile 0.95", "Quartile 0.99", "Maximum"),
  Value = c(mean_betweenness, quartiles_betweenness, max_betweenness)
)

# Create a data frame for degree centrality
degree_df <- data.frame(
  Measure = c("Mean", "Quartile 0.50 (Median)", "Quartile 0.90","Quartile 0.95","Maximum"),
  Value = c(mean_degree, quartiles_degree,max_degree)
)
Show the code
# Create the table for betweenness centrality
#| fig-height: 4
betweenness_df %>%
  kbl() %>%
  kable_paper("hover", full_width = F) %>%
  row_spec(0, bold = T, color = "white", background = "#D7261E")
Measure Value
Mean 8393.619
Quartile 0.50 (Median) 0.000
Quartile 0.90 3.000
Quartile 0.95 176.400
Quartile 0.99 165050.000
Maximum 3849384.703
Show the code
# Create the table for degree centrality
#| fig-height: 4
degree_df %>%
  kbl() %>%
  kable_paper("hover", full_width = F)%>%
  row_spec(0, bold = T, color = "white", background = "#D7261E")
Measure Value
Mean 1.287965
Quartile 0.50 (Median) 1.000000
Quartile 0.90 2.000000
Quartile 0.95 3.000000
Maximum 120.000000
Show the code
# Filter edges based on betweenness centrality threshold
edges_top_betweenness <- mc3_graph %>%
  filter(betweenness_centrality > 100000) %>%
  activate(edges)%>%
  as.tibble()%>%
  select(from, to, type, weights)
Show the code
nodes_top_betweenness <- mc3_graph %>% filter(betweenness_centrality>100000)%>%
  activate(nodes) %>%
  as.tibble() %>%
  rename(label = id) %>%
  mutate(id=row_number()) %>%
  select(id, label, type, country)
Show the code
nodes_top_betweenness <- nodes_top_betweenness %>% mutate(group = ifelse(is.na(type), "NA", type))
vis_plot <- visNetwork(nodes_top_betweenness, edges_top_betweenness,
           main = "Interactive Network Graph")%>%
  visIgraphLayout(layout = "layout_with_kk", smooth = FALSE,            
                  physics = TRUE            
                ) %>%
  visNodes(size = 50, label=nodes_top_betweenness$label) %>%
  visEdges(color = list(highlight = "lightgray"), arrows = 'to') %>%
  visOptions(selectedBy = "type",
             highlightNearest = list(enabled = TRUE,
                                     degree = 1,
                                     hover = TRUE,
                                     labelOnly = TRUE),
             nodesIdSelection = TRUE) %>%
  visGroups(groupname = "Company", color = "lightblue") %>%
  visGroups(groupname = "Company Contacts", color = "salmon") %>%
  visGroups(groupname = "Beneficial Owner", color = "yellow") %>%
  visGroups(groupname = "NA", color = "grey") %>%
  visLegend(width = 0.1) %>%
  visPhysics(repulsion = list(springlength = 50),
             maxVelocity = 2,
             solver = "forceAtlas2Based",
             forceAtlas2Based = list(gravitationalConstant = -1000),
             timestep = 0.25) %>%
  visLayout(randomSeed=4)
vis_plot
Show the code
# Filter edges based on degree centrality threshold
edges_top_degree <- mc3_graph %>%
  filter(degree_centrality >= 4) %>%
  activate(edges)%>%
  as.tibble()%>%
  select(from, to, type, weights)
Show the code
# Filter nodes based on degree centrality threshold
nodes_top_degree <- mc3_graph %>%
  filter(degree_centrality >= 4) %>%
  activate(nodes) %>%
  as.tibble() %>%
  rename(label = id) %>%
  mutate(id=row_number()) %>%
  select(id, label, type, country)
Show the code
nodes_top_degree <- nodes_top_degree %>% mutate(group = ifelse(is.na(type), "NA", type))
vis_plot <- visNetwork(nodes_top_degree, edges_top_degree,
           main = "Interactive Network Graph")%>%
  visIgraphLayout(layout = "layout_with_kk", smooth = FALSE,            
                  physics = TRUE            
                ) %>%
  visNodes(size = 50, label=nodes_top_degree$label) %>%
  visEdges(color = list(highlight = "lightgray"), arrows = 'to') %>%
  visOptions(selectedBy = "type",
             highlightNearest = list(enabled = TRUE,
                                     degree = 1,
                                     hover = TRUE,
                                     labelOnly = TRUE),
             nodesIdSelection = TRUE) %>%
  visGroups(groupname = "Company", color = "lightblue") %>%
  visGroups(groupname = "Company Contacts", color = "salmon") %>%
  visGroups(groupname = "Beneficial Owner", color = "yellow") %>%
  visGroups(groupname = "NA", color = "grey") %>%
  visLegend(width = 0.1) %>%
  visPhysics(repulsion = list(springlength = 50),
             maxVelocity = 2,
             solver = "forceAtlas2Based",
             forceAtlas2Based = list(gravitationalConstant = -1000),
             timestep = 0.25) %>%
  visLayout(randomSeed=4)
vis_plot
Show the code
owner_count <- mc3_edges %>%
  filter(type == "Beneficial Owner") %>%
  group_by(target) %>%
  summarise(count = n()) %>%
  filter(count >2) %>%
  arrange(desc(count))

DT::datatable(owner_count,options = list(lengthMenu = c(3, 10, 20)))
Show the code
mean_counts <- mean(owner_count$count, na.rm = TRUE)
quartiles_counts <- quantile(owner_count$count, probs = c(0.5,0.95), na.rm = TRUE)

# Create a data frame for betweenness centrality
count_df <- data.frame(
  Measure = c("Mean", "Quartile 0.50 (Median)", "Quartile 0.95"),
  Value = c(mean_counts, quartiles_counts)
)

count_df %>%
  kbl() %>%
  kable_paper("hover", full_width = F) %>%
  row_spec(0, bold = T, color = "white", background = "#D7261E")
Measure Value
Mean 3.472574
50% Quartile 0.50 (Median) 3.000000
95% Quartile 0.95 5.000000
Show the code
target_list <- owner_count$target[owner_count$count >= 5] 
Show the code
top_owners_edges <- mc3_edges[mc3_edges$target %in% target_list, ]
Show the code
id1_owners <- top_owners_edges %>%
  select(source) %>%
  rename(id = source)
id2_owners <- top_owners_edges %>%
  select(target) %>%
  rename(id = target)
top_owners_nodes <- rbind(id1_owners, id2_owners) %>% 
  distinct() %>%
  left_join(mc3_nodes1, unmatched='drop')
Show the code
top_owners_graph <- tbl_graph(nodes = top_owners_nodes,
                       edges = top_owners_edges,
                       directed = FALSE) %>%
  mutate(betweenness_centrality = centrality_betweenness(),
         degree_centrality = centrality_degree())
Show the code
top_owners_edges_df <- top_owners_graph %>%
  activate(edges) %>%
  as_tibble()
top_owners_nodes_df <- top_owners_graph %>%
  activate(nodes) %>%
  as.tibble() %>%
  rename(label = id) %>%
  mutate(id=row_number()) %>%
  select(id, label, type, country)
Show the code
visNetwork(top_owners_nodes_df,
           top_owners_edges_df) %>%
  visIgraphLayout(layout = "layout_with_kk") %>%
  visOptions(highlightNearest = list(enabled = TRUE,
                                     degree = 1,
                                     hover = TRUE,
                                     labelOnly = TRUE),
             nodesIdSelection = TRUE) %>%
  visEdges(arrows = "to")